RNA-Seq: expression comparison of lit, old and new data
Libraries required
library(plotly)
library(edgeR)
library(pheatmap)
library(viridis)
library(plyr)Data
load("input/SC_controls_rnaseq_salmon.tds.RData")
lab <- salmon
load("input/SC_controls_rnaseq_lit_salmon.tds.RData")
lit <- salmon
load("input/SC_controls_rnaseq_lab_june2021.tds.RData")
ns <- salmonExpression comparison of PND7/8
plot_expr <- function(s1, s2, p1, p2, n1, n2, num = 5000) {
library(robcor)
dat1 <- log2(sort(rowMeans(
s1@gene.counts[, grep(pattern = p1, x = colnames(s1@gene.counts), value = T)]
), decreasing = TRUE))
if(p2 == "PND14" | p2 == "PNW8"){
dat2 <- log2(sort((
s2@gene.counts[, grep(pattern = p2, x = colnames(s2@gene.counts), value = T)]
), decreasing = TRUE))
} else{
dat2 <- log2(sort(rowMeans(
s2@gene.counts[, grep(pattern = p2, x = colnames(s2@gene.counts), value = T)]
), decreasing = TRUE))
}
genes <- union(names(dat1)[1:num], names(dat2)[1:num])
x <- dat1[genes]
y <- dat2[genes]
x[is.infinite(x)] <- 0
y[is.infinite(y)] <- 0
fit <- lm(y ~ x)
fitr <- MASS::rlm(y ~ x)
cor <- cor.test(x,y)
cr <- robcor(x,y)
tl <- paste0(
"r = ", round(cor$estimate, 2),
"; p", ifelse(cor$p.value > 0.001, paste(" =", cor$p.value), " < 0.001"),
" robcor: ", round(cr, 2)
)
plot_ly() %>%
add_trace(x = x, y = y, mode = "markers", name = "Genes") %>%
add_trace(x = x, y = predict(fit), mode = "lines", name = "lm") %>%
add_trace(x = x, y = predict(fitr), mode = "lines", name = "rlm") %>%
layout(
xaxis = list(title = n1),
yaxis = list(title = n2),
title = paste("Pearson's correlation:", tl)
)
}PND7 vs PND8 (old)
plot_expr(s1 = lab, s2 = lit, p1 = "PND8", p2 = "PND7", n1 = "PND8 (old)", n2 = "PND7 (lit)")PND7 vs PND8 (new)
plot_expr(s1 = ns, s2 = lit, p1 = "PND8", p2 = "PND7", n1 = "PND8 (new)", n2 = "PND7 (lit)")PND8 (old) vs PND8 (new)
plot_expr(s1 = ns, s2 = lab, p1 = "PND8", p2 = "PND8", n1 = "PND8 (new)", n2 = "PND8 (old)")Expression comparison of PND14/15
PND14 vs PND15 (old)
plot_expr(s1 = lab, s2 = lit, p1 = "PND15", p2 = "PND14", n1 = "PND15 (old)", n2 = "PND14 (lit)")PND14 vs PND15 (new)
plot_expr(s1 = ns, s2 = lit, p1 = "PND15", p2 = "PND14", n1 = "PND15 (new)", n2 = "PND14 (lit)")PND15 (old) vs PND15 (new)
plot_expr(s1 = ns, s2 = lab, p1 = "PND15", p2 = "PND15", n1 = "PND15 (new)", n2 = "PND15 (old)")Expression comparison of PNW8/21
PNW8 vs PNW21
plot_expr(s1 = ns, s2 = lit, p1 = "PNW21", p2 = "PNW8", n1 = "PNW21 (new)", n2 = "PNW8 (lit)")Heatmap of some markers
genes <- data.frame(
Category = c(
rep("Germ cell licensing", 5),
rep("Undifferentiated Spermatogonia markers", 12),
"Sertoli cell marker",
"Leyidig cell marker",
"Translational initiation factor",
"Translational initiation factor",
"Housekeeping gene"
),
Genes = c(
"Nanos2", "Nanos3", "Dazl", "Ddx4", "Dmrt1",
"Taf4b", "Neurog3", "Utf1", "Id4", "Zbtb16", "Pou5f1",
"Lin28a", "Bcl6b", "Sall4", "Sohlh1", "Foxo1", "Gfra1",
"Gata4", "Lhcgr",
"Eif4h", "Eif4e", "Gapdh"
), stringsAsFactors = F
)
rownames(genes) <- genes$Genes
genes1 <- data.frame(
Category = c(
rep("Stem cell markers", 5),
rep("Germline pluripotency marker", 7),
rep("Progenitor-associated marker", 3),
"Leyidig cell marker",
rep("Sertoli cell marker", 2)
),
Genes = c(
"Lhx1", "Id4", "Gfra1", "Etv5", "Ret",
"Bcl6b", "Foxo1", "Sall4", "Dmrt1", "Zbtb16", "Dazl", "Ddx4",
"Neurog3", "Sohlh1", "Lin28b",
"Lhcgr",
"Gata4", "Vim"
), stringsAsFactors = F
)
rownames(genes1) <- genes1$Genes
genes <- genes[!rownames(genes) %in% intersect(genes1$Genes, genes$Genes),]
genes <- rbind(genes, genes1)
genes$Category <- factor(genes$Category, unique(genes$Category)[c(2,5,6,1,7,8,9,3,4)])
genes_sp <- split(genes, genes$Category)
genes <- ldply(genes_sp[unique(genes$Category)[c(2,5,6,1,7,8,9,3,4)]], .id = NULL)
rownames(genes) <- genes$Genes
tab <- data.frame(lit@gene.counts, lab@gene.counts, ns@gene.counts)
tab <- DGEList(tab)
tab <- calcNormFactors(tab)
cpm <- cpm(tab, log = T)
tab <- cpm[genes$Genes, ]
tab[mapply(is.infinite, tab)] <- 0
tab <- tab[, c(
grep(pattern = "PND7", x = colnames(tab), value = T),
grep(pattern = "PND8_[0-9][0-9]$", x = colnames(tab), value = T),
grep(pattern = "PND8_[0-9][0-9]_", x = colnames(tab), value = T),
grep(pattern = "PND14", x = colnames(tab), value = T),
grep(pattern = "PND15_[0-9][0-9]$", x = colnames(tab), value = T),
grep(pattern = "PND15_[0-9][0-9]_", x = colnames(tab), value = T),
grep(pattern = "PNW8", x = colnames(tab), value = T),
grep(pattern = "PNW21", x = colnames(tab), value = T),
grep(pattern = "Adult", x = colnames(tab), value = T)
)]
lit@phenoData$Source <- "Literature"## Loading required package: plgINS
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
## Warning: replacing previous import 'GenomicRanges::shift' by 'data.table::shift'
## when loading 'plgINS'
lab@phenoData$Source <- "Lab (old)"
annoCol <- rbind(lab@phenoData, lit@phenoData)
annoCol$LibPrepBatch <- NA
pn <- ns@phenoData
colnames(pn)[3] <- "Samples"
pn$Source <- "Lab (new)"
annoCol <- rbind(annoCol, pn)
annoCol$seqType <- "Ribo0"
annoCol$seqType[annoCol$Group == "Adult"] <- "polyA"
annoCol$Group[annoCol$Group == "Adult"] <- "PNW21"
annoCol$Group <- factor(annoCol$Group, unique(annoCol$Group)[c(5,3,4,2,6,1)])Heatmap
paletteLength <- 100
myColor <- colorRampPalette(rev(RColorBrewer::brewer.pal(11, "RdBu")))(paletteLength)
myBreaks <- c(seq(min(tab), 0, length.out=ceiling(paletteLength/2) + 1),
seq(max(tab)/paletteLength, max(tab), length.out=floor(paletteLength/2)))
pheatmap(
mat = tab, color = myColor, border_color = NA, breaks = myBreaks,
cluster_cols = F, cluster_rows = F,
gaps_col = c(16, 32),
gaps_row = c(4, 9,16, 18,21, 22, 24,26),
annotation_row = genes[, 1, drop = F],
annotation_col = annoCol[, c(1,3:5), drop = F], show_colnames = FALSE,
)SessionInfo
devtools::session_info()## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.0.4 (2021-02-15)
## os Ubuntu 16.04.7 LTS
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz Europe/Zurich
## date 2021-07-05
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib source
## annotate 1.68.0 2020-10-27 [1] Bioconductor
## AnnotationDbi 1.52.0 2020-10-27 [1] Bioconductor
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.4)
## Biobase 2.50.0 2020-10-27 [1] Bioconductor
## BiocGenerics 0.36.1 2021-04-16 [1] Bioconductor
## BiocParallel 1.24.1 2020-11-06 [1] Bioconductor
## Biostrings 2.58.0 2020-10-27 [1] Bioconductor
## bit 4.0.4 2020-08-04 [1] CRAN (R 4.0.4)
## bit64 4.0.5 2020-08-30 [1] CRAN (R 4.0.4)
## bitops 1.0-7 2021-04-24 [1] CRAN (R 4.0.4)
## blob 1.2.1 2020-01-20 [1] CRAN (R 4.0.4)
## bookdown 0.22 2021-04-22 [1] CRAN (R 4.0.4)
## bslib 0.2.4 2021-01-25 [1] CRAN (R 4.0.4)
## cachem 1.0.4 2021-02-13 [1] CRAN (R 4.0.4)
## callr 3.7.0 2021-04-20 [1] CRAN (R 4.0.4)
## cli 2.5.0 2021-04-26 [1] CRAN (R 4.0.4)
## colorspace 2.0-0 2020-11-11 [1] CRAN (R 4.0.4)
## crayon 1.4.1 2021-02-08 [1] CRAN (R 4.0.4)
## crosstalk 1.1.1 2021-01-12 [1] CRAN (R 4.0.4)
## data.table 1.14.0 2021-02-21 [1] CRAN (R 4.0.4)
## DBI 1.1.1 2021-01-15 [1] CRAN (R 4.0.4)
## DelayedArray 0.16.3 2021-03-24 [1] Bioconductor
## desc 1.3.0 2021-03-05 [1] CRAN (R 4.0.4)
## DESeq2 1.30.1 2021-02-19 [1] Bioconductor
## devtools 2.4.2 2021-06-07 [1] CRAN (R 4.0.4)
## digest 0.6.27 2020-10-24 [1] CRAN (R 4.0.4)
## dplyr 1.0.5 2021-03-05 [1] CRAN (R 4.0.4)
## edgeR * 3.32.1 2021-01-14 [1] Bioconductor
## ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.4)
## evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.4)
## fansi 0.4.2 2021-01-15 [1] CRAN (R 4.0.4)
## farver 2.1.0 2021-02-28 [1] CRAN (R 4.0.4)
## fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.0.4)
## fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.4)
## genefilter 1.72.1 2021-01-21 [1] Bioconductor
## geneplotter 1.68.0 2020-10-27 [1] Bioconductor
## generics 0.1.0 2020-10-31 [1] CRAN (R 4.0.4)
## GenomeInfoDb 1.26.7 2021-04-08 [1] Bioconductor
## GenomeInfoDbData 1.2.4 2021-03-30 [1] Bioconductor
## GenomicRanges 1.42.0 2020-10-27 [1] Bioconductor
## GEOquery 2.58.0 2020-10-27 [1] Bioconductor
## ggplot2 * 3.3.3 2020-12-30 [1] CRAN (R 4.0.4)
## glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.4)
## gridExtra 2.3 2017-09-09 [1] CRAN (R 4.0.4)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 4.0.4)
## highr 0.9 2021-04-16 [1] CRAN (R 4.0.4)
## hms 1.0.0 2021-01-13 [1] CRAN (R 4.0.4)
## htmltools 0.5.1.1 2021-01-22 [1] CRAN (R 4.0.4)
## htmlwidgets 1.5.3 2020-12-10 [1] CRAN (R 4.0.4)
## httr 1.4.2 2020-07-20 [1] CRAN (R 4.0.4)
## IRanges 2.24.1 2020-12-12 [1] Bioconductor
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.0.4)
## jsonlite 1.7.2 2020-12-09 [1] CRAN (R 4.0.4)
## knitr 1.33 2021-04-24 [1] CRAN (R 4.0.4)
## lattice 0.20-41 2020-04-02 [1] CRAN (R 4.0.4)
## lazyeval 0.2.2 2019-03-15 [1] CRAN (R 4.0.4)
## lifecycle 1.0.0 2021-02-15 [1] CRAN (R 4.0.4)
## limma * 3.46.0 2020-10-27 [1] Bioconductor
## locfit 1.5-9.4 2020-03-25 [1] CRAN (R 4.0.4)
## magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.0.4)
## MASS 7.3-53.1 2021-02-12 [1] CRAN (R 4.0.4)
## Matrix 1.3-2 2021-01-06 [1] CRAN (R 4.0.4)
## MatrixGenerics 1.2.1 2021-01-30 [1] Bioconductor
## matrixStats 0.58.0 2021-01-29 [1] CRAN (R 4.0.4)
## memoise 2.0.0 2021-01-26 [1] CRAN (R 4.0.4)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 4.0.4)
## pheatmap * 1.0.12 2019-01-04 [1] CRAN (R 4.0.4)
## pillar 1.6.0 2021-04-13 [1] CRAN (R 4.0.4)
## pkgbuild 1.2.0 2020-12-15 [1] CRAN (R 4.0.4)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.4)
## pkgload 1.2.1 2021-04-06 [1] CRAN (R 4.0.4)
## plgINS * 0.1.5 2021-05-03 [1] local
## plotly * 4.9.3 2021-01-10 [1] CRAN (R 4.0.4)
## plyr * 1.8.6 2020-03-03 [1] CRAN (R 4.0.4)
## prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.4)
## processx 3.5.1 2021-04-04 [1] CRAN (R 4.0.4)
## ps 1.6.0 2021-02-28 [1] CRAN (R 4.0.4)
## purrr 0.3.4 2020-04-17 [1] CRAN (R 4.0.4)
## R6 2.5.0 2020-10-28 [1] CRAN (R 4.0.4)
## RColorBrewer 1.1-2 2014-12-07 [1] CRAN (R 4.0.4)
## Rcpp 1.0.6 2021-01-15 [1] CRAN (R 4.0.4)
## RCurl 1.98-1.3 2021-03-16 [1] CRAN (R 4.0.4)
## readr 1.4.0 2020-10-05 [1] CRAN (R 4.0.4)
## remotes 2.3.0 2021-04-01 [1] CRAN (R 4.0.4)
## rlang 0.4.10 2020-12-30 [1] CRAN (R 4.0.4)
## rmarkdown 2.7 2021-02-19 [1] CRAN (R 4.0.4)
## rmdformats 1.0.1 2021-01-13 [1] CRAN (R 4.0.4)
## robcor * 0.1-6 2014-01-06 [1] CRAN (R 4.0.4)
## rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.0.4)
## RSQLite 2.2.7 2021-04-22 [1] CRAN (R 4.0.4)
## S4Vectors 0.28.1 2020-12-09 [1] Bioconductor
## sass 0.3.1 2021-01-24 [1] CRAN (R 4.0.4)
## scales 1.1.1 2020-05-11 [1] CRAN (R 4.0.4)
## sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.4)
## SRAdb 1.52.0 2020-10-27 [1] Bioconductor
## stringi 1.5.3 2020-09-09 [1] CRAN (R 4.0.4)
## stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.4)
## SummarizedExperiment 1.20.0 2020-10-27 [1] Bioconductor
## survival 3.2-11 2021-04-26 [1] CRAN (R 4.0.4)
## testthat 3.0.2 2021-02-14 [1] CRAN (R 4.0.4)
## tibble 3.1.1 2021-04-18 [1] CRAN (R 4.0.4)
## tidyr 1.1.3 2021-03-03 [1] CRAN (R 4.0.4)
## tidyselect 1.1.0 2020-05-11 [1] CRAN (R 4.0.4)
## usethis 2.0.1 2021-02-10 [1] CRAN (R 4.0.4)
## utf8 1.2.1 2021-03-12 [1] CRAN (R 4.0.4)
## vctrs 0.3.7 2021-03-29 [1] CRAN (R 4.0.4)
## viridis * 0.6.0 2021-04-15 [1] CRAN (R 4.0.4)
## viridisLite * 0.4.0 2021-04-13 [1] CRAN (R 4.0.4)
## withr 2.4.2 2021-04-18 [1] CRAN (R 4.0.4)
## xfun 0.24 2021-06-15 [1] CRAN (R 4.0.4)
## XML 3.99-0.6 2021-03-16 [1] CRAN (R 4.0.4)
## xml2 1.3.2 2020-04-23 [1] CRAN (R 4.0.4)
## xtable 1.8-4 2019-04-21 [1] CRAN (R 4.0.4)
## XVector 0.30.0 2020-10-27 [1] Bioconductor
## yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.4)
## zlibbioc 1.36.0 2020-10-27 [1] Bioconductor
##
## [1] /home/ubuntu/R/x86_64-pc-linux-gnu-library/4.0
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library